#delimit; 
clear all;
set more off;

capture log close;

log using covid.log, replace;

import excel "United_States_COVID-19_Cases_and_Deaths_by_State_over_Time_-_ARCHIVED.xlsx", sheet("United_States_COVID-19_Cases_an") firstrow;

su;

sort state submission_date;

**** New York State's reported case and death counts do not include New York City's counts as they separately report nationally notifiable conditions to CDC ****;
gen gestfips=.;
replace gestfips=1 if state=="AL";
replace gestfips=2 if state=="AK";
replace gestfips=4 if state=="AZ";
replace gestfips=5 if state=="AR";
replace gestfips=6 if state=="CA";
replace gestfips=8 if state=="CO";
replace gestfips=9 if state=="CT";
replace gestfips=10 if state=="DE";
replace gestfips=11 if state=="DC";
replace gestfips=12 if state=="FL";
replace gestfips=13 if state=="GA";
replace gestfips=15 if state=="HI";
replace gestfips=16 if state=="ID";
replace gestfips=17 if state=="IL";
replace gestfips=18 if state=="IN";
replace gestfips=19 if state=="IA";
replace gestfips=20 if state=="KS";
replace gestfips=21 if state=="KY";
replace gestfips=22 if state=="LA";
replace gestfips=23 if state=="ME";
replace gestfips=24 if state=="MD";
replace gestfips=25 if state=="MA";
replace gestfips=26 if state=="MI";
replace gestfips=27 if state=="MN";
replace gestfips=28 if state=="MS";
replace gestfips=29 if state=="MO";
replace gestfips=30 if state=="MT";
replace gestfips=31 if state=="NE";
replace gestfips=32 if state=="NV";
replace gestfips=33 if state=="NH";
replace gestfips=34 if state=="NJ";
replace gestfips=35 if state=="NM";
replace gestfips=36 if state=="NY" | state=="NYC";
replace gestfips=37 if state=="NC";
replace gestfips=38 if state=="ND";
replace gestfips=39 if state=="OH";
replace gestfips=40 if state=="OK";
replace gestfips=41 if state=="OR";
replace gestfips=42 if state=="PA";
replace gestfips=44 if state=="RI";
replace gestfips=45 if state=="SC";
replace gestfips=46 if state=="SD";
replace gestfips=47 if state=="TN";
replace gestfips=48 if state=="TX";
replace gestfips=49 if state=="UT";
replace gestfips=50 if state=="VT";
replace gestfips=51 if state=="VA";
replace gestfips=53 if state=="WA";
replace gestfips=54 if state=="WV";
replace gestfips=55 if state=="WI";
replace gestfips=56 if state=="WY";

keep if gestfips~=.;

gen tudate=submission_date;
format tudate %tdnn/dd/CCYY;
keep tudate gestfips state tot_cases tot_death;
su;

collapse (sum) tot_death, by(tudate);

**** 2019 US total population from the 2019 ACS, https://www2.census.gov/programs-surveys/acs/tech_docs/pums/estimates/pums_estimates_19.csv ****; 
gen population=328239523;

gen cum_death_mil=tot_death*1000000/population;
gen new_death_mil=(tot_death-tot_death[_n-1])*1000000/population;

tsset tudate;
tssmooth ma new_death_ma7=new_death_mil, window(6 1 0);  /* 7 day moving average */

twoway (line new_death_ma7 tudate if tudate<mdy(1,1,2022), yaxis(1) ytitle("Daily new deaths per million")) 
       (line cum_death_mil tudate if tudate<mdy(1,1,2022), yaxis(2) ytitle("Cumulative deaths per million", axis(2)) lpattern(longdash)), 
       tline(1march2021) tlabel(1march2021 "3/1/2021", add angle(45)) xtitle("Date") legend(order(1 "Daily new deaths" 2 "Cumulative deaths")) ;
graph save covid_deaths, replace;	  

log close;